## Snippet for PanelMatch Troubleshooting
## D Stanescu 7/12/2019


rm(list = ls(all = TRUE)) 
#install_github("insongkim/PanelMatch", dependencies=TRUE)

library(gdata)
library(reshape)
library(lubridate)
library(erer)
library(readxl)   
library(stringr)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(panelView)
library(PanelMatch)
library(countrycode)
library(tidyr)
library(rstudioapi)
library(grid)
library(ggplotify)

# Set working directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# Set seed
set.seed(2000)

# define a function to format names
simpleCap <- function(x) {
  s <- strsplit(x, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2),
        sep="", collapse=" ")
}

###############################################
# Load and format data
###############################################

# Load yields data and format date
yields <- read.csv("_GFD_clean.csv")
yields$RDate <- as.Date(yields$RDate)

# Load events data and format date
events <- read.csv("_events.csv")
events$event.dates <- as.Date(events$event.dates, format = '%m/%d/%y')
colnames(events)[1] <- "date"

# Format country code
yields$ccode <- countrycode(yields$Country.Name, 'country.name', 'cown')

# Recode HK as 999 to keep in sample
yields$ccode[yields$Country.Name == "hong kong"] <- 999

# Remove obs with missing country code
yields <- yields[!is.na(yields$ccode), ]

###############################################
# ATE Panel Match Function
###############################################

# define function to format data and estimate panel match
pm.function <- function(data, events, daybeforeevent, dayafterevent, refinement){
  
  # Initialize output
  out <- list()
  
  # iterate over events
  for (i in 1:nrow(events)) {
    row <- events[i, ]
    
    # Initialize treatment column
    data$treatment <- NA
    
    # Subset yields to estimation + event windows
    currdata <- data[data$RDate >=  as.Date(as.character(row$date)) - daybeforeevent &  
                       data$RDate   <= as.Date(as.character(row$date)) + dayafterevent,]
    
    # Code treatment variable  
    for(j in 1:nrow(currdata)){
      datarow <- currdata[j, ]
      
      currdata[j,  "treatment"] <- ifelse(datarow$Region == row$region & 
                                            datarow$RDate >= as.Date(as.character(row$date)), 1, 0)
    }
    
    
    # Remove countries without sufficient pre-treatment periods
    countries <- as.data.frame(unique(currdata$Country.Name))
    
    for (k in  1:nrow(countries)){
      if(nrow(currdata[countries[k, 1] == currdata$Country.Name & 
                       currdata$RDate < as.Date(as.character(row$date)),]) < daybeforeevent/2){ # if lower bound estimation window too low, reset threshold 
        currdata <- currdata[!( currdata$Country.Name == countries[k, 1]),]} 
    }
    
    # Create time id that is numeric and consecutive
    currdata$date2 <- as.numeric(currdata$RDate)
    table <- as.data.frame(table(currdata$date2))
    table$Freq <- NULL
    colnames(table)[1] <- "date2"
    table$dateindex <- rownames(table)
    currdata <- merge(currdata, table, by = "date2", all.x = TRUE, all.y = FALSE)
    currdata$dateindex <- as.integer(currdata$dateindex)
    
    # Run panel match
    try({PM.results <- PanelMatch(lag = 5, time.id = "dateindex", unit.id = "ccode", 
               treatment = "treatment", refinement.method = refinement, 
               data = currdata, match.missing = T, 
               covs.formula = ~ I(lag(treatment, 1:5)) +
                 I(lag(Yield, 1:5)), 
               size.match = 5, qoi = "att",  
               outcome.var = "Yield",
               lead = 0:3)})
    
    # Estimate results
    try({out[[i]] <- PanelEstimate(sets = PM.results, 
                                   data = currdata)} ) 
  }
  return(out)
} 

###############################################
# Estimation
###############################################

# Define events to be estimated (those with sufficient coverage)
to_est <- c(2, 4, 8, 10:19, 23:25)

# Call pm function using malahanobis and cbps
pm_mal <- pm.function(yields, events[to_est, ], 30, 5, "mahalanobis")
pm_cbps <- pm.function(yields, events[to_est,], 30, 5, "CBPS.match")

###############################################
# Plot Results
###############################################

# Format names and indices
let <- letters[seq( from = 1, to = 16 )]

for(i in 1:nrow(events)){
events$label[i] <- simpleCap(events$names[i])
}

events$label[2] <- "Singh (West)"
events$label[19] <- "Singh (Asia)"


# cbps
for(i in 1:length(to_est)){
pdf(paste0("Figures/fig_7", let[i], ".pdf"))
plot(pm_cbps[[i]], xlab = "Days", ylab = "", main = events$label[i])
dev.off()
}

# mahalanobis
for(i in 1:length(to_est)){
  pdf(paste0("Figures/fig_8", let[i], ".pdf"))
  plot(pm_mal[[i]], xlab = "Days", ylab = "", main = events$label[i])
  dev.off()
}

